# 读取数训练据
import json
import numpy


def processlabel(line):
    # 初始化空数组
    line = line.strip()
    if len(line) == 0:
        return
    linejson = json.loads(line)
    nertag = numpy.zeros(len(linejson["data"]))
    tokens = [i for i in linejson["data"]]
    for (start, stop, tagstr) in linejson["label"]:
        if tagstr == "NN":
            nertag[start:stop] = 0
        else:
            starttagstr = "B-" + tagstr
            intenelstr = "I-" + tagstr
            nertag[start] = tag2id[starttagstr]
            nertag[start + 1:stop] = tag2id[intenelstr]
    return {"id": linejson["id"], "tokens": tokens, "ner_tags": nertag}
